package com.test.pipleline;

import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;

/**
 * @Author lizhilong
 * @create 2020/1/6 20:01
 * @desc
 */
@Component
public class SpiderStarter {

    public void run() {
        Spider picSpider = Spider.create(new ImageProcesserPipeline()).addUrl("http://www.win4000.com/zt/gaoqing_1.html")
                .thread(5)
                .addPipeline(new DownLoadPipeline())
                .setScheduler(new FileCacheQueueScheduler("C:\\Imageurl\\").setDuplicateRemover(new BloomFilterDuplicateRemover(100000)))
                .setExitWhenComplete(true);
        picSpider.start();
        picSpider.stop();
    }
}
